home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Gamers Delight 2
/
Gamers Delight 2.iso
/
Aminet
/
game
/
misc
/
Life.lzh
/
Life
/
src
/
life68.asm
< prev
next >
Wrap
Assembly Source File
|
1993-03-13
|
6KB
|
259 lines
;
; Life in 68000!
;
public _dogen
;
; This is the world's fastest life code, written for the 68000.
; (Actually, I've discovered an even faster algorithm, but the
; margin is too small to contain it.)
;
; Input is:
;
; 60 long *s ; // old life generation we compute new one from
; 64 long *d ; // where to put the result; must also be generation before s
; // if not generation before s, set `m' array to all 1's.
; 68 short w ; // the width of the input array in *bytes*; must be divisible
; // by 4 and <= 128; this is the width to do *now*.
; 70 short h ; // the height of the input array in pixels; no constraints.
; 72 long *m ; // an array that indicates what's changing on the screen.
; // must be `h+1' longwords long, and the actual `m' pointer
; // passed has to point to the second longword. The first
; // longword (actually *(m-1)) should always be 0.
; 76 long *t ; // a temporary array we use internally; must be 6 * w bytes
; // long.
; 80 long *lft;// the array to `modify' for left-changes
; 84 long *rht;// the array to store the right changes in
; 88 long mod; // the amount to add to go to the next row
;
; All pointers must be long-word aligned for reasonable speed.
;
; This is our main and only entry point.
;
_dogen:
movem.l d1-d7/a0-a6,-(sp)
;
; Our first order of business is to take our `modified' array and `grow' it
; by one pixel in each direction. This is because a change in a cell can
; affect each closest neighbor, so we have to recompute them as well.
;
; There are two steps to this. First, we grow horizontally (a few shifts and
; an `or') and then we compute vertically.
;
move.l #128,d0
sub.w 68(a7),d0
lsr.w #2,d0
move.l #1,d4
lsl.l d0,d4
neg.l d4
move.l 72(a7),a0
move.l #0,d1
move.w 70(a7),d1
lsl.l #2,d1
move.l #0,d2
move.l d2,-4(a0,d1.l)
move.l d2,(a0,d1.l)
move.l d2,(a0)+
move.w 70(a7),d1
sub.w #2,d1
bra orvb
orvl:
move.l (a0),d0
or.l d0,d2
or.l 4(a0),d2
and.l d4,d2
move.l d2,(a0)+
move.l d0,d2
orvb:
dbra d1,orvl
;
; Now our growing is finished. We initialize things for our outer loop, which
; runs down the scan lines. Our register usage for the outer loop is as
; follows:
;
; a0: pointer to m array
; a1: pointer to source
; a2: pointer to destination
; a3, a4, a5: pointer to rotating portions of t array
; d5: remaining amount to compute
; a6: row increment (amount to add to go to next row)
;
; We initialize things to point to a fake row before the bitmaps. This is
; necessary because we need to compute the horizontal sums early. We won't
; ever write to these areas, because the modified bits will always be zero
; (if the user has made sure that *(m-1) is always zero.)
;
move.l 88(a7),d5
move.l d5,d0
add.l d0,d0
move.l d5,a6
move.l 76(a7),a3
lea (a3,d0.l),a4
lea (a4,d0.l),a5
move.l 60(a7),a1
move.l 64(a7),a2
move.l 72(a7),a0
sub.w #4,a0
sub.w d5,a1
sub.w d5,a2
move.w 70(a7),d1
bra outb
;
; This is the part of our outer loop that computes the horizontal sums.
; We need to compute a horizontal sum if any of the following three rows
; needs recomputing. We only actually do the work if necessary.
;
; If we don't need to compute these, we also don't need to compute the
; actual row itself, so we skip past everything. Pretty cool, eh?
;
outm:
move.l (a0),d7
or.l 4(a0),d7
or.l 8(a0),d7
beq skippastall
movem.l d1/a1/a2,-(sp)
lea (a1,a6.w),a1
move.l a3,a2
agh:
add.l d7,d7
bcc skipi
aghi:
move.l (a1)+,d0
move.l d0,d1
move.l d0,d2
move.b -5(a1),d3
lsr.b #1,d3
roxr.l #1,d2
move.b (a1),d3
add.b d3,d3
addx.l d1,d1
move.l d1,d3
and.l d2,d3
eor.l d2,d1
move.l d0,d2
and.l d1,d2
eor.l d0,d1
or.l d2,d3
move.l d1,(a2)+
move.l d3,(a2)+
add.l d7,d7
bcs aghi
skipone:
beq agx
skipi:
add.w #4,a1
add.w #8,a2
add.l d7,d7
bcs aghi
bne skipi
agx:
movem.l (sp)+,d1/a1/a2
;
; Okay, with that out of the way we can compute the new values. As we compute
; the new values, we compare our results with the old results. If the old
; results are different, we set a modified bit in our new array. Clever, eh?
;
move.l (a0),d7
beq skippastall
movem.l d1/a1-a5,-(sp)
move.w #32,d6
move.l #0,d5
move.l #0,(a0)
mal:
sub.w #1,d6
add.l d7,d7
bcc skipon3
mal2:
move.l (a3)+,d0
move.l d0,d1
move.l (a4)+,d2
eor.l d2,d0
and.l d2,d1
move.l (a5)+,d2
move.l d2,d3
and.l d0,d3
eor.l d2,d0
or.l d3,d1
move.l (a3)+,d2
move.l d2,d3
and.l d1,d2
eor.l d3,d1
move.l (a4)+,d3
move.l d3,d4
and.l d1,d3
eor.l d4,d1
eor.l d3,d2
move.l (a5)+,d3
move.l d3,d4
and.l d1,d3
eor.l d4,d1
eor.l d3,d2
not.l d0
eor.l d1,d0
eor.l d1,d2
or.l (a1)+,d1
and.l d1,d0
and.l d2,d0
move.l (a2)+,d2
eor.l d0,d2
beq mal
bpl sk3
sub.w #31,d6
bcs sk9
move.l 104(a7),d1
sub.l 96(a7),d1
or.b #1,3(a0,d1.l)
bra sk5
sk9:
bset d6,d5
sk5:
add.w #31,d6
sk3:
bset d6,d5
lsr.b #1,d2
bcc sk4
sub.w #1,d6
bcc sk7
exg d1,a0
move.l 108(a7),a0
move.l d1,(a0)+
move.l a0,108(a7)
exg d1,a0
bra sk8
sk7:
bset d6,d5
sk8:
add.w #1,d6
sk4:
move.l d0,-4(a2)
sub.w #1,d6
add.l d7,d7
bcs mal2
beq max
skipon3:
add.w #8,a3
add.w #8,a4
add.w #8,a5
add.w #4,a1
add.w #4,a2
sub.w #1,d6
add.l d7,d7
bcs mal2
bne skipon3
max:
move.l d5,(a0)
movem.l (sp)+,d1/a1-a5
skippastall:
lea (a1,a6.w),a1
lea (a2,a6.w),a2
add.w #4,a0
exg a3,a4
exg a4,a5
outb:
dbra d1,outm
fini:
move.l 84(a7),a0
move.l #0,(a0)
movem.l (sp)+,d1-d7/a0-a6
move.l #0,d0
rts